#!/usr/bin/env python

import nltk
from nltk.corpus import gutenberg

fileid = gutenberg.fileids()[0] 
# word
all_words = gutenberg.words(fileid)
filtered_words = [w for w in all_words if w.isalpha()]
fdist = nltk.FreqDist(filtered_words)
print fdist.most_common(50)

# character
raw = gutenberg.raw(fileid)
fdist = nltk.FreqDist(ch.lower() for ch in raw if ch.isalpha())
print fdist.most_common(10)

# plot all
cfd = nltk.ConditionalFreqDist(
	(fileid, char.lower())
	for fileid in gutenberg.fileids()[:5]
	for char in gutenberg.raw(fileid)
	if char.isalpha()
	)
cfd.tabulate()
cfd.plot()
